rm(list=ls(all=TRUE))
dev.off()
# Created using R version 3.3.2 under OS X Yosemite 10.10.5
library(plm) # Created using version 1.6-5
library(dplyr) # Created using version 0.5.0
library(tidyr) # Created using version 0.6.1
library(lme4) # Created using version 1.1-12
library(lfe) # Created using version 2.5-1998
library(reshape2) # Created using version 1.4.2
library(stargazer) # Created using version 5.2
library(ggplot2) # Created using version 2.2.1
library(xtable) # Created using version 1.8-2
library(Matrix) # Created using version 1.2-7.1
library(Formula) # Created using version 1.2-1

# NOTE: Set working directory
setwd("~/Downloads/rep")

# File "cooperman_ri.R" creates the outputs loaded here and included in the replication folder.

# Load Data --------
load("cooperman_dataset.Rdata") 
load("ate_sims.Rdata")
#load("ate_sims_rep.Rdata") # Uncomment if not using the downloaded file.
load("pvals_by_nsims.Rdata")
#load("pvals_by_nsims_rep.Rdata") # Uncomment if not using the downloaded file.

names(dataset)
counties.onlygomez <- unique(dataset$FIPS.County[is.na(dataset$Rainfall12)])
# Subset data to drop 7 counties in Gomez et al. (2007) dataset that do not have out-of-sample rainfall data
data2 <- dataset[!is.na(dataset$Rainfall12),]
data2 <- data2 %>% arrange(FIPS.County,Year)

# Appendix Table 1 ------
# Replication of Gomez et al. (2007) results

counties.onlygomez <- unique(dataset$FIPS.County[is.na(dataset$Rainfall12)])
rep1 <- lmer(Turnout ~ Rain + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=dataset)
rep2 <- lmer(Turnout ~ Rain + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=dataset, subset= !(FIPS.County %in% counties.onlygomez))
rep3 <- lmer(Turnout ~ Rainfall12 + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=dataset)
rep4 <- lmer(Turnout ~ Rainfall12.index + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=dataset)
rep5 <- lmer(Turnout ~ Rainfall12.spi + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=dataset)

covlabs <- c("Rain (inches) - Gomez et al. (2007)", "Rain (inches) - Author", "Rain Index - Author","Rain SPI - Author", "Snow", "/% High School Graduates", "Median HH Income", "/% African American", "Farms per Capita",  "Closed Days Pre-Election","Motor Voter", "Property Requirement Vote Law", "Literacy Vote Law", "Poll Tax Vote Law", "Gubernatorial Elec.", "Senatorial Elec.", "Turnout (lag)",  "Intercept")
cat(stargazer(rep1, rep2, rep3, rep4, rep5, add.lines = list(c("Sample", "Original", "Limited", "Limited", "Limited", "Limited")),no.space=T, omit=c( "Year"), omit.stat = c("rsq", "f"),float=F, dep.var.labels = "Voter Turnout", align=T,covariate.labels = covlabs), file="tableA1.tex", sep="\n")

#########################################################
# Appendix Tables 2 and 3 ------
# Clustered standard errors with Inches
counties.onlygomez <- unique(dataset$FIPS.County[is.na(dataset$Rainfall12)])
data.lim <- subset(dataset, subset= !(FIPS.County %in% counties.onlygomez) & !(is.na(Turnout)))
data.lim$State.Year <- paste0(data.lim$State,data.lim$Year)
data.lim$CWA.Year <- paste0(data.lim$CWA_short,data.lim$Year)
data.lim$REG.Year <- paste0(data.lim$REG,data.lim$Year)

rep.felm <- felm(Turnout ~ Rainfall12 + Snow + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax + GubElection + SenElection + Turnout.Lag | FIPS.County + Year , data=data.lim)
rep.felm.cwa.year <- felm(Turnout ~ Rainfall12 + Snow + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax + GubElection + SenElection + Turnout.Lag | FIPS.County + Year |0|CWA.Year, data=data.lim)
rep.felm.cwa <- felm(Turnout ~ Rainfall12 + Snow + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax + GubElection + SenElection + Turnout.Lag | FIPS.County + Year |0|CWA_short, data=data.lim)
rep.felm.state.year <- felm(Turnout ~ Rainfall12 + Snow + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax + GubElection + SenElection + Turnout.Lag | FIPS.County + Year |0|State.Year, data=data.lim)
rep.felm.state <- felm(Turnout ~ Rainfall12 + Snow + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax + GubElection + SenElection + Turnout.Lag | FIPS.County + Year |0|State, data=data.lim)
rep.felm.reg.year <- felm(Turnout ~ Rainfall12 + Snow + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax + GubElection + SenElection + Turnout.Lag | FIPS.County + Year |0|REG.Year, data=data.lim)
rep.felm.year <- felm(Turnout ~ Rainfall12 + Snow + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax + GubElection + SenElection + Turnout.Lag | FIPS.County + Year |0|Year, data=data.lim)

all.ses <- rbind(sqrt(diag(vcov(rep.felm))), sqrt(diag(vcov(rep.felm.cwa.year))), sqrt(diag(vcov(rep.felm.cwa))), sqrt(diag(vcov(rep.felm.state.year))), sqrt(diag(vcov(rep.felm.state))), sqrt(diag(vcov(rep.felm.reg.year))), sqrt(diag(vcov(rep.felm.year))))
coef <- round(rep.felm$coefficients[1],3)
ses <- round(all.ses[,1],3)
tstats <- rep.felm$coefficients[1]/all.ses[,1]
pvals <- round(2*(1-pt(abs(tstats),df=rep.felm$df)),3)

f <- function(x){length(unique(x))}
clusts <- c("NA",f(data.lim$CWA.Year), f(data.lim$CWA_short), f(data.lim$State.Year), f(data.lim$State), f(data.lim$REG.Year), f(data.lim$Year))
cluster.se.table <- matrix(rbind(rep(coef,7), ses, pvals, clusts),nrow = 4,ncol=7)
cluster.se.table[cluster.se.table=="0"] <- "<.001"
colnames(cluster.se.table) <- c("None", "CWA-Year", "CWA", "State-Year", "State", "Region-Year", "Year")
row.names(cluster.se.table) <- c("Coefficient", "Standard Error", "P-Value", "No. Clusters")

two.tail.US.FE <- sum(abs(ate$ate.sims.US.FE) >= abs(coef))/length(ate$ate.sims.US.FE)
cluster.se.table <- cbind(cluster.se.table, c(coef,"",two.tail.US.FE, ""))
colnames(cluster.se.table) <- c("None", "CWA-Year", "CWA", "State-Year","State", "Region-Year", "Year", "RI US-level")
cat(print.xtable(xtable(cluster.se.table,digits=3),floating=F, include.rownames=T), file="tableA2.tex", sep="\n")

# Cluster standard errors with SPI 
counties.onlygomez <- unique(dataset$FIPS.County[is.na(dataset$Rainfall12)])
data.lim <- subset(dataset, subset= !(FIPS.County %in% counties.onlygomez) & !(is.na(Turnout)))
data.lim$State.Year <- paste0(data.lim$State,data.lim$Year)
data.lim$CWA.Year <- paste0(data.lim$CWA_short,data.lim$Year)
data.lim$REG.Year <- paste0(data.lim$REG,data.lim$Year)

rep.felm.spi <- felm(Turnout ~ Rainfall12.spi + Snow + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax + GubElection + SenElection + Turnout.Lag | FIPS.County + Year , data=data.lim)
rep.felm.spi.cwa.year <- felm(Turnout ~ Rainfall12.spi + Snow + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax + GubElection + SenElection + Turnout.Lag | FIPS.County + Year |0|CWA.Year, data=data.lim)
rep.felm.spi.cwa <- felm(Turnout ~ Rainfall12.spi + Snow + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax + GubElection + SenElection + Turnout.Lag | FIPS.County + Year |0|CWA_short, data=data.lim)
rep.felm.spi.state.year <- felm(Turnout ~ Rainfall12.spi + Snow + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax + GubElection + SenElection + Turnout.Lag | FIPS.County + Year |0|State.Year, data=data.lim)
rep.felm.spi.state <- felm(Turnout ~ Rainfall12.spi + Snow + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax + GubElection + SenElection + Turnout.Lag | FIPS.County + Year |0|State, data=data.lim)
rep.felm.spi.reg.year <- felm(Turnout ~ Rainfall12.spi + Snow + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax + GubElection + SenElection + Turnout.Lag | FIPS.County + Year |0|REG.Year, data=data.lim)
rep.felm.spi.year <- felm(Turnout ~ Rainfall12.spi + Snow + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax + GubElection + SenElection + Turnout.Lag | FIPS.County + Year |0|Year, data=data.lim)

all.ses.spi <- rbind(sqrt(diag(vcov(rep.felm.spi))), sqrt(diag(vcov(rep.felm.spi.cwa.year))), sqrt(diag(vcov(rep.felm.spi.cwa))), sqrt(diag(vcov(rep.felm.spi.state.year))), sqrt(diag(vcov(rep.felm.spi.state))), sqrt(diag(vcov(rep.felm.spi.reg.year))), sqrt(diag(vcov(rep.felm.spi.year))))
coef.spi <- round(rep.felm.spi$coefficients[1],3)
ses.spi <- round(all.ses.spi[,1],3)
tstats.spi <- rep.felm.spi$coefficients[1]/all.ses.spi[,1]
pvals.spi <- round(2*(1-pt(abs(tstats.spi),df=rep.felm.spi$df)),3)

f <- function(x){length(unique(x))}
clusts <- c("NA",f(data.lim$CWA.Year), f(data.lim$CWA_short), f(data.lim$State.Year), f(data.lim$State), f(data.lim$REG.Year), f(data.lim$Year))
cluster.se.table.spi <- matrix(rbind(rep(coef.spi,7), ses.spi, pvals.spi, clusts),nrow = 4,ncol=7)
cluster.se.table.spi[cluster.se.table.spi=="0"] <- "<.001"
colnames(cluster.se.table.spi) <- c("None", "CWA-Year", "CWA",  "State-Year","State", "Region-Year", "Year")
row.names(cluster.se.table.spi) <- c("Coefficient", "Standard Error", "P-Value", "No. Clusters")

two.tail.US.spi.FE <- sum(abs(ate$ate.sims.US.spi.FE) >= abs(coef.spi))/length(ate$ate.sims.US.spi.FE)
cluster.se.table.spi <- cbind(cluster.se.table.spi, c(coef.spi,"",two.tail.US.spi.FE, ""))
colnames(cluster.se.table.spi) <- c("None", "CWA-Year", "CWA", "State-Year","State",  "Region-Year", "Year", "RI US-level")
cat(print.xtable(xtable(cluster.se.table.spi,digits=3),floating=F, include.rownames=T), file="tableA3.tex", sep="\n")

###################################################################
# Appendix Table 4 ------

# Robustness without post-2000 data

gomez.model <- lmer(Turnout ~ Rainfall12 + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=data2)
ate.obs <- fixef(gomez.model)["Rainfall12"]
gomez.model.index <- lmer(Turnout ~ Rainfall12.index + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=data2)
ate.obs.index <- fixef(gomez.model.index)["Rainfall12.index"]
gomez.model.spi <- lmer(Turnout ~ Rainfall12.spi + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=data2)
ate.obs.spi <- fixef(gomez.model.spi)["Rainfall12.spi"]

two.tail.US.spi <- sum(abs(ate$ate.sims.US.spi) >= abs(ate.obs.spi))/length(ate$ate.sims.US.spi)
two.tail.reg.spi <- sum(abs(ate$ate.sims.reg.spi) >= abs(ate.obs.spi))/length(ate$ate.sims.reg.spi)
two.tail.state.spi <- sum(abs(ate$ate.sims.state.spi) >= abs(ate.obs.spi))/length(ate$ate.sims.state.spi)
two.tail.cwa.spi <- sum(abs(ate$ate.sims.cwa.spi) >= abs(ate.obs.spi))/length(ate$ate.sims.cwa.spi)
two.tail.indep.spi <- sum(abs(ate$ate.sims.indep.spi) >= abs(ate.obs.spi))/length(ate$ate.sims.indep.spi)

two.tail.US.index <- sum(abs(ate$ate.sims.US.index) >= abs(ate.obs.index))/length(ate$ate.sims.US.index)
two.tail.reg.index <- sum(abs(ate$ate.sims.reg.index) >= abs(ate.obs.index))/length(ate$ate.sims.reg.index)
two.tail.state.index <- sum(abs(ate$ate.sims.state.index) >= abs(ate.obs.index))/length(ate$ate.sims.state.index)
two.tail.cwa.index <- sum(abs(ate$ate.sims.cwa.index) >= abs(ate.obs.index))/length(ate$ate.sims.cwa.index)
two.tail.indep.index <- sum(abs(ate$ate.sims.indep.index) >= abs(ate.obs.index))/length(ate$ate.sims.indep.index)

two.tail.US <- sum(abs(ate$ate.sims.US) >= abs(ate.obs))/length(ate$ate.sims.US)
two.tail.reg <- sum(abs(ate$ate.sims.reg) >= abs(ate.obs))/length(ate$ate.sims.reg)
two.tail.state <- sum(abs(ate$ate.sims.state) >= abs(ate.obs))/length(ate$ate.sims.state)
two.tail.cwa <- sum(abs(ate$ate.sims.cwa) >= abs(ate.obs))/length(ate$ate.sims.cwa)
two.tail.indep <- sum(abs(ate$ate.sims.indep) >= abs(ate.obs))/length(ate$ate.sims.indep)

two.tail.US.pre2001 <- sum(abs(ate$ate.sims.US.pre2001) >= abs(ate.obs))/length(ate$ate.sims.US.pre2001)
two.tail.US.index.pre2001 <- sum(abs(ate$ate.sims.US.index.pre2001) >= abs(ate.obs.index))/length(ate$ate.sims.US.index.pre2001)
two.tail.US.spi.pre2001 <- sum(abs(ate$ate.sims.US.spi.pre2001) >= abs(ate.obs.spi))/length(ate$ate.sims.US.spi.pre2001)

values <- data.frame(t(as.matrix(c("Rainfall (in)", round(ate.obs,3), two.tail.indep, two.tail.cwa, two.tail.state, two.tail.reg, two.tail.US, two.tail.US.pre2001))),stringsAsFactors = F)
values.index <- data.frame(t(as.matrix(c("Rainfall Index", round(ate.obs.index,3), two.tail.indep.index, two.tail.cwa.index, two.tail.state.index, two.tail.reg.index, two.tail.US.index, two.tail.US.index.pre2001))),stringsAsFactors = F)
values.spi <- data.frame(t(as.matrix(c("Rainfall SPI", round(ate.obs.spi,3), two.tail.indep.spi, two.tail.cwa.spi, two.tail.state.spi, two.tail.reg.spi, two.tail.US.spi, two.tail.US.spi.pre2001))),stringsAsFactors = F)
colnames(values) <- colnames(values.index) <- colnames(values.spi) <- c("Var", "Estimated ATE", "County", "County Warning Area", "State", "Weather Region", "US", "US (Pre-2001)")

table <- rbind(values, values.index, values.spi)
table[table=="0"] <- "<.001"
colnames(table) <-  c("Var", "Estimated ATE", "County", "County Warning Area", "State", "Weather Region", "US", "US (Pre-2001)")
cat(print.xtable(xtable(table,digits=3,font.size="tiny"),floating=F, caption.placement="top", include.rownames=F), file="tableA4.tex", sep="\n")

###################################################################
# Appendix Tables 5 and 6 ------
# STATE CORRELATIONS 
states <- unique(data2$State)
state.corr.stats.inches <- data.frame(matrix(NA, nrow=48, ncol=7))
names(state.corr.stats.inches) <- c("State", "Min.", "1st Qu.","Median","Mean","3rd Qu.","Max.")
for (s in 1:length(states)){
  state.corr.stats.inches[s,1] <- states[s]
  data.state <- subset(data2, State==states[s])
  FIPS.state <- unique(data.state$FIPS.County)
  corrmat.state <- matrix(NA,length(FIPS.state), length(FIPS.state))
  for (i in 1:length(FIPS.state)){
    for (j in 1:length(FIPS.state)){
      corrmat.state[i,j] <- cor(data.state$Rainfall12[data.state$FIPS.County==FIPS.state[i]], data.state$Rainfall12[data.state$FIPS.County==FIPS.state[j]])
    }
  }
  state.corr.stats.inches[s,2:7] <-  summary(corrmat.state[upper.tri(corrmat.state)])
  print(states[s])
}

state.corr.stats.spi <- data.frame(matrix(NA, nrow=48, ncol=7))
names(state.corr.stats.spi) <- c("State", "Min.", "1st Qu.","Median","Mean","3rd Qu.","Max.")
for (s in 1:length(states)){
  state.corr.stats.spi[s,1] <- states[s]
  data.state <- subset(data2, State==states[s])
  FIPS.state <- unique(data.state$FIPS.County)
  corrmat.state <- matrix(NA,length(FIPS.state), length(FIPS.state))
  for (i in 1:length(FIPS.state)){
    for (j in 1:length(FIPS.state)){
      corrmat.state[i,j] <- cor(data.state$Rainfall12.spi[data.state$FIPS.County==FIPS.state[i]], data.state$Rainfall12.spi[data.state$FIPS.County==FIPS.state[j]])
    }
  }
  state.corr.stats.spi[s,2:7] <-  summary(corrmat.state[upper.tri(corrmat.state)])
  print(states[s])
}

data2 <- data2 %>% arrange(Year,FIPS.County)
states <- unique(data2$State)
states.n <- rep(NA, 48)
for (i in 1:48){states.n[i] <- length(unique(data2$FIPS.County[data2$State==states[i]]))}

state.corr.stats.inches <- cbind(state.corr.stats.inches, states.n)
state.corr.stats.spi <- cbind(state.corr.stats.spi, states.n)
names(state.corr.stats.spi) <- names(state.corr.stats.inches) <- c("State", "Min.", "1st Qu.","Median","Mean","3rd Qu.","Max.", "Num. Counties")
cat(print.xtable(xtable(state.corr.stats.inches,digits=3),floating=F, include.rownames=F), file="tableA5.tex", sep="\n")
cat(print.xtable(xtable(state.corr.stats.spi,digits=3),floating=F, include.rownames=F), file="tableA6.tex", sep="\n")

###################################################################
# Appendix Table 7 and Figure 1 -------
# Precision of US-year Rainfall (inch) estimates under different numbers of simulations

nsims.iter=c(50,100,200,500)
means <- apply(X=pvals.by.nsims,MARGIN = 2,FUN = mean)
sd <- apply(X=pvals.by.nsims,MARGIN = 2,FUN = sd)
min <- apply(X=pvals.by.nsims,MARGIN = 2,FUN = min)
max <- apply(X=pvals.by.nsims,MARGIN = 2,FUN = max)
sums.table <- cbind(means, sd, min, max)
rownames(sums.table) <- paste0("No. Sims = ",nsims.iter)
colnames(sums.table) <- c("Mean", "SD", "Min", "Max")
round(sums.table,3)
cat(print.xtable(xtable(sums.table,digits=3),floating=F, include.rownames=T), file="tableA7.tex", sep="\n")

gomez.model <- lmer(Turnout ~ Rainfall12 + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=data2)
ate.obs <- fixef(gomez.model)["Rainfall12"]

two.tail.US <- sum(abs(ate$ate.sims.US) >= abs(ate.obs))/length(ate$ate.sims.US)
pdf(file = "figureA1.pdf", onefile=FALSE,paper="special",width=5,height=5)
plot(density(pvals.by.nsims[,4], cut=2,adjust=2),
     xlim=c(0,0.2), main="", xlab="P-Value")
lines(density(pvals.by.nsims[,3], cut=2, adjust=2), lty= 2)
lines(density(pvals.by.nsims[,2], cut=2, adjust=2), lty= 4)
lines(density(pvals.by.nsims[,1], cut=0, adjust=2), lty=3)
abline(v=two.tail.US, col="red")
legend("topright", title="  No. of Sims ", nsims.iter[1:4],nsims.iter[1:4], lty=c(3,4,2,1))
dev.off()
